⌛️ FIERLENIUS 🦙¶

✍ Authors:

ROUAUD Lucas

Master 2 Bio-informatics at Univerité de Paris

Python 3.10.8 Conda 22.11.1 GitHub last commit GitHub stars

  • ../index.html

In [ ]:
# ==================================================

# To skip all warnings link to numpy module version.

# [W]
from warnings import simplefilter
simplefilter("ignore", UserWarning)

# ==================================================

# Import packages

# [N]
import numpy as np
# [P]
import plotly.graph_objects as go

# [M]
from matplotlib import colormaps
# [O]
from os import popen
# [P]
import plotly.figure_factory as ff
from plotly import offline
# [R]
from random import seed
# [S]
from sklearn.decomposition import PCA
from sys import path
# [T]
from tqdm.notebook import tqdm

seed(1)
offline.init_notebook_mode()

COMMAND: str = "ls ../../../data/peitsch2vec/"

# ==============
#
# SPECIAL IMPORT
#
# ==============

# Checking parent directory.
path.append("../")

from context_analyzer import MultipleContextAnalyzer, center_context

Vector's norm meaning without --segment¶

Loading data¶

In [ ]:
# Get data.
data: object = np.load(
    popen(
        COMMAND + "default_domain/embedding*.npy").readlines()[0].strip(),
    allow_pickle=True
)
characteristic: object = np.transpose(np.load(
    popen(
        COMMAND + "default_domain/characteristics*.npy").readlines()[0].strip(),
    allow_pickle=True
))
cosine: object = np.load(
    popen(COMMAND + "default_domain/matrix_cosine*.npy").readlines()[0].strip(),
    allow_pickle=True
)
context_mean: object = np.load(
    ("../../../data/context_mean.npy"),
    allow_pickle=True
)

# Code Peitsch.
peitsch: object = characteristic[0].astype("int64")
# Representatives numbers.
domain: object = characteristic[5].astype("int64")
# Representatives numbers.
ssr_code: object = np.array(characteristic[4])
ssr: object = characteristic[4]
# Cosine similarities.
similarity: object = cosine.flatten()

for i, ssr_i in enumerate(sorted(list(set(ssr)))):
    ssr[ssr == ssr_i] = i

print(f"{domain[:10]=}")
print(f"{peitsch[:10]=}")
print(f"{ssr[:10]=}")
print(f"{similarity[:10]=}")

frequency: object = np.log10(domain)
surprise: object = - np.divide(np.log(domain / np.sum(domain)),
                               np.log(domain.shape[0]))
norm: object = np.linalg.norm(data, axis=1)


sort_context: object = list(peitsch)

for i in context_mean[2:]:
    line: int = np.where(peitsch == int(i[0]))[0][0]

    i["nan" == i] = "1"
    sort_context[line] = list(i)[1:]

sort_context = np.array(sort_context, dtype=float) * 100

print(f"{frequency.shape=}")
print(f"{surprise.shape=}")
print(f"{norm.shape=}")
print(f"{sort_context.shape=}")
domain[:10]=array([7065, 5093, 4243, 3160, 3124, 2341, 2310, 2175, 2135, 1937])
peitsch[:10]=array([ 5,  7,  9, 13, 11, 17, 19, 25, 15, 21])
ssr[:10]=array([3, 0, 4, 3, 0, 4, 1, 1, 0, 0], dtype=object)
similarity[:10]=array([0.99999976, 0.7355762 , 0.78994834, 0.6438811 , 0.7622997 ,
       0.68122286, 0.6563339 , 0.75481045, 0.6778948 , 0.7173601 ],
      dtype=float32)
frequency.shape=(325,)
surprise.shape=(325,)
norm.shape=(325,)
sort_context.shape=(325, 10)
In [ ]:
# Get data.
corpus: object = np.load(
    popen(
        COMMAND + "default_domain/corpus*.npy").readlines()[0].strip(),
    allow_pickle=True
)

context_list: np.ndarray = [[], [], []]

for p_i in peitsch:
    context: list = []
    for sentence in corpus:
        if f"{p_i}" in sentence:
            context += [np.array(sentence).astype(int)]

    c_context: np.ndarray = center_context(
        context=context,
        window=10,
        center=p_i,
        gap_symbol=-1
    )

    Context: MultipleContextAnalyzer = MultipleContextAnalyzer(*c_context)
    Context.bray_curtis(skip_element=-1, do_skip=True)
    Context.dissimilarity()

    context_list[0] += [Context.distance[0] * 100]
    context_list[1] += [Context.distance[1] * 100]

    Context: MultipleContextAnalyzer = MultipleContextAnalyzer(*context)
    Context.bray_curtis(skip_element=-1, do_skip=True)

    context_list[2] += [Context.distance[1] * 100]
    
context_list = np.array(context_list)
In [ ]:
h_template: "list[str]" = []

for i, p_i in enumerate(characteristic[0]):
    h_template += [(
        f"<b>{p_i}</b><br /><br />"
        f"Occurrences number in domains: {characteristic[5][i]}<br />"
        f"Regular secondary structure: {ssr_code[i]}<br />"
        f"Hydrophobic score: {characteristic[2][i]}<br />"
        f"Cluster size: {characteristic[3][i]}<br />"
        f"Vector's norm: {norm[i]:.3f}<br />"
        f"<b>[MULTIPLE] </b>Context compute with a dissimilarity (%): {context_list[0][i]:.1f}<br />"
        f"<b>[MULTIPLE] </b>Bray-Curtis distance (%): {context_list[1][i]:.1f}<br />"
        f"<b>[MULTIPLE] </b>Bray-Curtis distance compute on domain (%): {context_list[2][i]:.1f}<br />"
        f"<b>[PAIR-WISE] </b>O(NP) (%): {sort_context[i][0]:.1f}<br />"
        f"<b>[PAIR-WISE | a] </b>O(NP) (%): {sort_context[i][1]:.1f}<br />"
        f"<b>[PAIR-WISE | b] </b>O(NP) (%): {sort_context[i][2]:.1f}<br />"
        f"<b>[PAIR-WISE | c] </b>O(NP) (%): {sort_context[i][3]:.1f}<br />"
        f"<b>[PAIR-WISE | d] </b>O(NP) (%): {sort_context[i][4]:.1f}<br />"
        f"<b>[PAIR-WISE] </b>Bray-Curtis distance (%): {sort_context[i][5]:.1f}<br />"
        f"<b>[PAIR-WISE | a] </b>Bray-Curtis distance (%): {sort_context[i][6]:.1f}<br />"
        f"<b>[PAIR-WISE | b] </b>Bray-Curtis distance (%): {sort_context[i][7]:.1f}<br />"
        f"<b>[PAIR-WISE | c] </b>Bray-Curtis distance (%): {sort_context[i][8]:.1f}<br />"
        f"<b>[PAIR-WISE | d] </b>Bray-Curtis distance (%): {sort_context[i][9]:.1f}<br />"
    )]

h_template = np.array(h_template)

Testing distribution¶

In [ ]:
plot_distribution: object = go.Figure()

cmap = colormaps["viridis"]

color: object = cmap(np.linspace(0, 1, 2))
fill: object = np.array(color)
fill[:, -1] = 0.35

plot_distribution.add_trace(go.Violin(
    x=similarity,
    y0=" ",
    name="Cosine similarities",
    box_visible=True,
    line_width=1,
    line_color="#444",
    marker_color=f"rgba{tuple(color[0])}",
    marker_line_color="#444",
    marker_line_width=1,
    fillcolor=f"rgba{tuple(fill[0])}"
))

# Add the rectangle border.
plot_distribution.add_shape(
    type="rect",
    xref="paper",
    yref="paper",
    x0=0,
    y0=0,
    x1=1,
    y1=1,
    line=dict(color="black", width=1)
)

plot_distribution.update_traces()

# Modify general plot properties.
plot_distribution.update_layout(
    plot_bgcolor="white",
    legend_title="<b>Data distribution</b>",
    margin=dict(l=30, r=30, t=30, b=30),
    font=dict(size=14),
    xaxis_title="<b>Distribution</b>",
    yaxis_title="<b>Descriptors</b>"
)

# Modify axis properties.
plot_distribution.update_xaxes(showline=True, linewidth=1)
plot_distribution.update_yaxes(showline=True, linewidth=1)

plot_distribution["data"][0]["showlegend"] = True

# Show the plot.
plot_distribution.show()

plot_distribution.write_html(
    f"/home/lrouaud/Téléchargements/cosine_distribution.html",
    full_html=False,
    include_plotlyjs="../../node_modules/plotly.js-dist-min/plotly.min.js"
)
In [ ]:
TO_PLOT: bool = False

plot_distribution: object = go.Figure()

cmap = colormaps["viridis"]

color: object = cmap(np.linspace(0, 1, 2))
fill: object = np.array(color)
fill[:, -1] = 0.35

plot_distribution.add_trace(go.Violin(
    y=context_list[0] * 100,
    x0="Dissimilarity (%)",
    name="Dissimilarity percentage",
    box_visible=True,
    line_width=1,
    line_color="#444",
    marker_color=f"rgba{tuple(color[0])}",
    marker_line_color="#444",
    marker_line_width=1,
    fillcolor=f"rgba{tuple(fill[0])}"
))

plot_distribution.add_trace(go.Violin(
    y=context_list[1] * 100,
    x0="Bray curtis distance (%)",
    name="Bray curtis distance",
    box_visible=True,
    line_width=1,
    line_color="#444",
    marker_color=f"rgba{tuple(color[1])}",
    marker_line_color="#444",
    marker_line_width=1,
    fillcolor=f"rgba{tuple(fill[1])}"
))

if TO_PLOT:
    plot_distribution.add_trace(go.Violin(
        y=context_list[2] * 100,
        x0="Bray curtis distance compute<br />on the whole domain (%)",
        name="Bray curtis distance<br />on the whole domain",
        box_visible=True,
        line_color=f"rgba{tuple(color[2])}",
        marker_color=f"rgba{tuple(fill[2])}"
    ))

# Add the rectangle border.
plot_distribution.add_shape(
    type="rect",
    xref="paper",
    yref="paper",
    x0=0,
    y0=0,
    x1=1,
    y1=1,
    line=dict(width=1, color="black")
)

plot_distribution.update_traces()

# Modify general plot properties.
plot_distribution.update_layout(
    plot_bgcolor="white",
    legend_title="<b>Data distribution</b>",
    margin=dict(l=30, r=30, t=30, b=30),
    font=dict(size=14),
    xaxis_title="<b>Descriptors</b>",
    yaxis_title="<b>Distribution</b>"
)

# Modify axis properties.
plot_distribution.update_xaxes(showline=True, linewidth=1)
plot_distribution.update_yaxes(showline=True, linewidth=1)

plot_distribution["data"][0]["showlegend"] = True

# Show the plot.
plot_distribution.show()

plot_distribution.write_html(
    f"/home/lrouaud/Téléchargements/hard_context_distribution.html",
    full_html=False,
    include_plotlyjs="../../node_modules/plotly.js-dist-min/plotly.min.js"
)
In [ ]:
from plotly.subplots import make_subplots

def plot_distribution(
    x_data: np.array,
    y_data: np.array,
    title: str,
    x_axis_title: str,
    y_axis_title: str,
    html: str = "",
    xaxis_lim: "list[int]" = None,
    yaxis_lim: "list[int]" = None,
    to_return: bool = False,
    multiple_plot: bool = False
) -> None:
    if multiple_plot:
        plot_distribution: object = make_subplots(
            rows=1,
            cols=3,
            specs=[[{"colspan": 2}, None, {}]]
        )
    else:
        plot_distribution: object = go.Figure()

    cmap = colormaps["viridis"]
    ssr_set: "list[int]" = list(set(ssr_code))
    ssr_set.sort()

    color: object = np.flip(cmap(np.linspace(0, 1, len(ssr_set))), axis=0)

    for i in sorted(list(set(ssr))):
        if multiple_plot:
            plot_distribution.append_trace(go.Scatter(
                x=x_data[ssr == i],
                y=y_data[ssr == i],
                name=ssr_set[i],
                mode="markers",
                marker=dict(
                    color=f"rgba{tuple(color[i])}",
                    line=dict(color="#444", width=1)
                ),
                hovertemplate=h_template[ssr == i]
            ), row=1, col=1)
        else:
            plot_distribution.add_trace(go.Scatter(
                x=x_data[ssr == i],
                y=y_data[ssr == i],
                name=ssr_set[i],
                mode="markers",
                marker=dict(
                    color=f"rgba{tuple(color[i])}",
                    line=dict(color="#444", width=1)
                ),
                hovertemplate=h_template[ssr == i]
            ))
            

    plot_distribution.update_traces()

    # Modify general plot properties.
    plot_distribution.update_layout(
        plot_bgcolor="white",
        legend_title=title,
        margin=dict(l=30, r=30, t=30, b=30),
        font=dict(size=12),
        xaxis_title=x_axis_title,
        yaxis_title=y_axis_title
    )
    
    if xaxis_lim != None:
        plot_distribution.update_layout(
            xaxis_range=xaxis_lim
        )
    
    if yaxis_lim != None:
        plot_distribution.update_layout(
            yaxis_range=yaxis_lim
        )
        
    if not multiple_plot:
        # Add the rectangle border.
        plot_distribution.add_shape(
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1,
            y1=1,
            line=dict(color="black", width=1)
        )

    # Modify axis properties.
    plot_distribution.update_xaxes(showline=True, linewidth=1)
    plot_distribution.update_yaxes(showline=True, linewidth=1)

    plot_distribution["data"][0]["showlegend"] = True

    for i_code in [147, 201]:
        plot_distribution.add_annotation(
            x=x_data[peitsch == i_code][0],
            y=y_data[peitsch == i_code][0],
            text=f"<b> {i_code}</b>",
            showarrow=True,
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor=f"black",
            font_color="black",
            align="center",
            font=dict(size=14),
            bgcolor="rgba(255, 255, 255, 0.6)"
        )

    if to_return:
        return plot_distribution
    else:
        # Show the plot.
        plot_distribution.show()

        if html != "":
            plot_distribution.write_html(
                f"/home/lrouaud/Téléchargements/{html}",
                full_html=False,
                include_plotlyjs=("../../node_modules/plotly.js-dist-min/"
                                  "plotly.min.js")
            )
In [ ]:
distribution_data: object = np.load("../../../data/distribution_list.npy",
                                    allow_pickle=True)

PEITSCH_DISTRIBUTION: int = [147, 201]

button: "list" = []
set_ssr: set = list(set(ssr))
set_ssr.sort()

for i in range(5, 10):
    if i == 5:
        to_plot = plot_distribution(
            x_data=frequency,
            y_data=sort_context[:, i],
            title=("<b>LEFT:</b> Distance<br />in function of log<sub>10</sub><br />of the frequency<br />"
                   "<b>RIGHT:</b> Context distri-<br />bution"),
            x_axis_title="<b>log<sub>10</sub>(frequencies)</b>",
            y_axis_title="<b>Distance (%)</b>",
            yaxis_lim=[sort_context.min() * 0.99, sort_context.max() * 1.01],
            to_return=True,
            multiple_plot=True
        )

        to_plot.add_vrect(
            x0=frequency.min() * 0.99,
            x1=1.8,
            fillcolor="Lightgrey",
            layer="below",
            line_width=0
        )

        to_plot.add_vrect(
            x0=2.8,
            x1=frequency.max() * 1.01,
            fillcolor="Lightgrey",
            layer="below",
            line_width=0
        )
        
        for j, peitsch_i in enumerate(PEITSCH_DISTRIBUTION):
            x = distribution_data[j][i - 5]
            # Fill the plot with transparent colours.
            color: object = cmap(np.linspace(0, 1, len(PEITSCH_DISTRIBUTION)))
            fill: object = np.array(color)
            fill[:, -1] = 0.35

            # Those next plot are shown by default.
            # Add a violin plot.
            to_plot.append_trace(go.Violin(
                y=x,
                x0=f"{peitsch_i}",
                name=peitsch_i,
                showlegend=True,
                line_width=1,
                line_color="#444",
                marker_color=f"rgba{tuple(color[j])}",
                marker_line_color="#444",
                marker_line_width=1,
                fillcolor=f"rgba{tuple(fill[j])}",
                span=[x.min(), x.max()],
                points=False
            ), row=1, col=3)

            # Add a box plot to have MEAN and SD values.
            to_plot.append_trace(go.Box(
                y=x,
                x0=f"{peitsch_i}",
                name=peitsch_i,
                showlegend=False,
                line_width=1.5,
                line_color="#444",
                marker_color=f"rgba{tuple(color[j])}",
                marker_line_color="#444",
                marker_line_width=1,
                fillcolor=f"rgba{tuple(fill[j])}",
                boxmean="sd",
                jitter=0.5
            ), row=1, col=3)

    data_list: "list" = []

    for ssr_j in set_ssr:
        data_list += [sort_context[:, i][ssr == ssr_j]]

    for j, peitsch_i in enumerate(PEITSCH_DISTRIBUTION):
        data_list += [distribution_data[j][i - 5]] * 2

    annotation_list: list = []

    for i_code in [147, 201]:
        annotation_list += [
            dict(
                x=frequency[peitsch == i_code][0],
                y=sort_context[:, i][peitsch == i_code][0],
                y2=[1],
                text=f"<b> {i_code}</b>",
                showarrow=True,
                arrowhead=2,
                arrowsize=1,
                arrowwidth=2,
                arrowcolor=f"black",
                font_color="black",
                align="center",
                font=dict(size=14),
                bgcolor="rgba(255, 255, 255, 0.6)"
            )
        ]

    if i == 5:
        label: str = "ALL"
    else:
        label: str = context_mean[1][i + 1].upper()

    button += [dict(
        args=[
            dict(y=data_list + [6] * 4),
            dict(annotations=annotation_list),
        ],
        label=label,
        method="update"
    )]

update_menu = [dict(
    buttons=button,
    type="dropdown",
    direction="up",
    showactive=True,
    x=1.01,
    xanchor="left",
    y=0,
    yanchor="bottom",
    font_color="black"
)]

# Add the rectangle border.
to_plot.add_shape(
    type="rect",
    xref="paper",
    yref="paper",
    x0=0,
    y0=0,
    x1=0.62,
    y1=1,
    line=dict(color="black", width=1)
)

# Add the rectangle border.
to_plot.add_shape(
    type="rect",
    xref="paper",
    yref="paper",
    x0=0.72,
    y0=0,
    x1=1,
    y1=1,
    line=dict(color="black", width=1)
)

to_plot.update_layout(
    updatemenus=update_menu,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis2_title="<b>Peitsch code</b>",
    yaxis2_title="<b>Distance (%)</b>",
    xaxis_title_standoff=1,
    xaxis2_title_standoff=1,
    yaxis_title_standoff=1,
    yaxis2_title_standoff=1
)
to_plot.show()

to_plot.write_html(
    f"/home/lrouaud/Téléchargements/distance_vs_log_frequency.html",
    full_html=False,
    include_plotlyjs="../../node_modules/plotly.js-dist-min/plotly.min.js"
)
In [ ]:
button: "list" = []
set_ssr: set = list(set(ssr))
set_ssr.sort()

for i in range(10):
    if i == 0:
        to_plot: object = plot_distribution(
            x_data=domain,
            y_data=sort_context[:, i],
            title=f"<b>Vectors' norms in<br />function of the context<br />conservation</b>",
            x_axis_title="<b>Frequencies</b>",
            y_axis_title="<b>Distance (%)</b>",
            yaxis_lim=[sort_context.min() * 0.99, sort_context.max() * 1.01],
            to_return=True,
            multiple_plot=False
        )

    data_list: "list" = []

    for ssr_j in set_ssr:
        data_list += [sort_context[:, i][ssr == ssr_j]]

    annotation_list: list = []

    for i_code in [105, 147, 201, 921]:
        annotation_list += [
            dict(
                x=domain[peitsch == i_code][0],
                y=sort_context[:, i][peitsch == i_code][0],
                text=f"<b> {i_code}</b>",
                showarrow=True,
                arrowhead=2,
                arrowsize=1,
                arrowwidth=2,
                arrowcolor=f"black",
                font_color="black",
                align="center",
                font=dict(size=14),
                bgcolor="rgba(255, 255, 255, 0.6)"
            )
        ]

    if i in [0, 5]:
        label: str = f"{context_mean[0][i + 1]}"
    else:
        label: str = f"{context_mean[0][i + 1]} - {context_mean[1][i + 1]} SCOPe class"

    button += [dict(
        args=[
            dict(y=data_list),
            dict(annotations=annotation_list)
        ],
        label=label,
        method="update"
    )]

update_menu = [dict(
    buttons=button,
    type="dropdown",
    direction="up",
    showactive=True,
    x=1.01,
    xanchor="left",
    y=0,
    yanchor="bottom",
    font_color="black"
)]

to_plot.update_layout(updatemenus=update_menu)
to_plot.show()

to_plot.write_html(
    f"/home/lrouaud/Téléchargements/distance_vs_frequency.html",
    full_html=False,
    include_plotlyjs="../../node_modules/plotly.js-dist-min/plotly.min.js"
)
In [ ]:
to_plot = plot_distribution(
    x_data=frequency,
    y_data=norm,
    title="<b>Vectors' norms in<br />function of the log<sub>10</sub><br />of the frequencies</b>",
    x_axis_title="<b>log<sub>10</sub>(frequencies)</b>",
    y_axis_title="<b>Vectors' norms</b>",
    to_return=True,
    multiple_plot=False
)

to_plot.add_vrect(
    x0=frequency.min() * 0.99,
    x1=1.8,
    fillcolor="Lightgrey",
    layer="below",
    line_width=0
)

to_plot.add_vrect(
    x0=2.8,
    x1=frequency.max() * 1.01,
    fillcolor="Lightgrey",
    layer="below",
    line_width=0
)

to_plot.update_layout(
    xaxis_showgrid=False,
    yaxis_showgrid=False,
)

to_plot.show()

to_plot.write_html(
    f"/home/lrouaud/Téléchargements/vector_log10.html",
    full_html=False,
    include_plotlyjs="../../node_modules/plotly.js-dist-min/plotly.min.js"
)
In [ ]:
plot_distribution(
    x_data=surprise,
    y_data=norm,
    title="<b>Vectors' norms in function<br />of the Peitsch code's surprise</b>",
    x_axis_title="<b>Surprise</b>",
    y_axis_title="<b>Vectors' norms</b>"
)

Checking vector norms versus a context¶

In [ ]:
plot_distribution(
    x_data=context_list[0],
    y_data=norm,
    title="<b>Vectors' norms in function<br />of the dissimilarity</b>",
    x_axis_title="<b>Context compute with a dissimilarity (%)</b>",
    y_axis_title="<b>Vectors' norms (%)</b>",
    html="dissimilarity.html"
)
In [ ]:
plot_distribution(
    x_data=context_list[1],
    y_data=norm,
    title="<b>Vectors' norms in function<br />of the Bray-Curtis distance</b>",
    x_axis_title="<b>Bray-Curtis distance (%)</b>",
    y_axis_title="<b>Vectors' norms</b>",
    html="bray_curtis.html"
)
In [ ]:
plot_distribution(
    x_data=context_list[2],
    y_data=norm,
    title="<b>Vectors' norms in function<br />of the Bray-Curtis distance<br />compute on domain</b>",
    x_axis_title="<b>Bray-Curtis distance compute on domain (%)</b>",
    y_axis_title="<b>Vectors' norms</b>"
)

Mean vector¶

In [ ]:
f_data: np.ndarray = [np.mean(data, axis=0)] + list(data)

# Do a PCA.
pca = PCA(n_components=2)
transform_data_pca: object = np.transpose(pca.fit(f_data).transform(f_data))

ratio_1: int = round(pca.explained_variance_ratio_[0] * 100)
ratio_2: int = round(pca.explained_variance_ratio_[1] * 100)

cmap = colormaps["viridis"]
color: object = cmap(np.linspace(0, 1, 4))

vector_plot: object = ff.create_quiver(
    x=[0],
    y=[0],
    u=[0],
    v=[0],
    name="Vector",
    line_color=f"rgba{tuple(color[2])}",
    line_width=2
)

annotation_list: "list" = []

for i, x_data in enumerate(tqdm(transform_data_pca[0][:-1])):
    if peitsch[i] in [105, 147, 201, 921]:
        arrow_color: str = f"rgba{tuple(color[1])}"
    else:
        arrow_color: str = f"rgba{tuple(color[2])}"

    vector_plot.add_annotation(
        ax=0,
        ay=0,
        x=x_data,
        y=transform_data_pca[1][i],
        xref="x",
        yref="y",
        axref="x",
        ayref="y",
        arrowhead=2,
        showarrow=True,
        arrowcolor=arrow_color,
        arrowsize=1,
        arrowwidth=1.5,
        text=""
    )

    if peitsch[i] in [105, 147, 201, 921]:
        annotation_list += [[x_data, transform_data_pca[1][i],
                             f"<b>{peitsch[i]}</b>"]]

# Add the rectangle border.
vector_plot.add_shape(
    type="rect",
    xref="paper",
    yref="paper",
    x0=0,
    y0=0,
    x1=1,
    y1=1,
    line=dict(color="black", width=1)
)

vector_plot.update_traces()

# Modify general plot properties.
vector_plot.update_layout(
    plot_bgcolor="white",
    legend_title="<b>Quiver plot of obtain<br />embedding</b>",
    margin=dict(l=30, r=30, t=30, b=30),
    font=dict(size=14),
    xaxis_title=f"<b>X components ({ratio_1} %)</b>",
    yaxis_title=f"<b>Y components ({ratio_2} %)</b>"
)

vector_plot.add_annotation(
    ax=0,
    ay=0,
    x=transform_data_pca[0][325],
    y=transform_data_pca[1][325],
    xref="x",
    yref="y",
    axref="x",
    ayref="y",
    arrowhead=2,
    text="",
    showarrow=True,
    arrowcolor=f"rgba{tuple(color[0])}",
    arrowsize=1.5,
    arrowwidth=2
)

# Modify axis properties.
vector_plot.update_xaxes(showline=True, linewidth=1, range=[-1, 1.1])
vector_plot.update_yaxes(showline=True, linewidth=1)

vector_plot.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode="lines",
    name="Mean vector",
    line=dict(color=f"rgba{tuple(color[0])}", width=2),
))

vector_plot.add_trace(go.Scatter(
    x=transform_data_pca[0][:325],
    y=transform_data_pca[1][:325],
    mode="markers",
    marker=dict(color=f"rgba{tuple(list(color[1][:3]) + [0])}", size=10),
    name="<b>[hover data]</b>",
    showlegend=False,
    hovertemplate=h_template
))

for annotation in annotation_list:
    vector_plot.add_annotation(
        x=annotation[0],
        y=annotation[1],
        text=annotation[2],
        showarrow=False,
        font_color="black",
        align="center",
        font=dict(size=14),
        bgcolor="rgba(255, 255, 255, 0.6)"
    )

# Show the plot.
vector_plot.show()

vector_plot.write_html(
    f"/home/lrouaud/Téléchargements/vector_plot.html",
    full_html=False,
    include_plotlyjs="../../node_modules/plotly.js-dist-min/plotly.min.js"
)
  0%|          | 0/325 [00:00<?, ?it/s]
In [ ]:
plot_distribution: object = go.Figure()

cmap = colormaps["viridis"]

color: object = cmap(np.linspace(0, 1, 2))
fill: object = np.array(color)
fill[:, -1] = 0.35

plot_distribution.add_trace(go.Violin(
    x=np.mean(data, axis=0),
    y0=" ",
    name="Mean vector",
    box_visible=True,
    line_width=1,
    line_color="#444",
    marker_color=f"rgba{tuple(color[0])}",
    marker_line_color="#444",
    marker_line_width=1,
    fillcolor=f"rgba{tuple(fill[0])}"
))

# Add the rectangle border.
plot_distribution.add_shape(
    type="rect",
    xref="paper",
    yref="paper",
    x0=0,
    y0=0,
    x1=1,
    y1=1,
    line=dict(color="black", width=1)
)

plot_distribution.update_traces()

# Modify general plot properties.
plot_distribution.update_layout(
    plot_bgcolor="white",
    legend_title="<b>Data distribution in<br />the mean vector</b>",
    margin=dict(l=30, r=30, t=30, b=30),
    font=dict(size=14),
    xaxis_title="<b>Distribution</b>",
    yaxis_title="<b>Descriptors</b>"
)

# Modify axis properties.
plot_distribution.update_xaxes(showline=True, linewidth=1)
plot_distribution.update_yaxes(showline=True, linewidth=1)

plot_distribution["data"][0]["showlegend"] = True

# Show the plot.
plot_distribution.show()